# -*- coding: utf-8 -*-

from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
#请求url结果使用utf-8编码
resp = urlopen("https://en.wikipedia.org/wiki/Main_Page").read().decode("utf-8")
#使用BeautifulSoup解析网站
soup = BeautifulSoup(resp,"html.parser")
#获取所有已/wiki/开头的a标签的href属性
listUrl =  soup.findAll("a",href = re.compile("^/wiki/"))
for url in listUrl:
    if not re.search("\.(jpg|JPG)$",url["href"]): #过滤掉jpg｜JPG结尾的url
        #输出url对应的文字和链接
        print(url.get_text(),"<-- -->", "https://en.wikipedia.org/"+url["href"])
